/*
 * Copyright (c) 2007, 2015, Oracle and/or its affiliates. All rights reserved.
 * ORACLE PROPRIETARY/CONFIDENTIAL. Use is subject to license terms.
 */
/*
 * The Apache Software License, Version 1.1
 *
 *
 * Copyright (c) 1999-2004 The Apache Software Foundation.
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 *
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 *
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in
 *    the documentation and/or other materials provided with the
 *    distribution.
 *
 * 3. The end-user documentation included with the redistribution,
 *    if any, must include the following acknowledgment:
 *       "This product includes software developed by the
 *        Apache Software Foundation (http://www.apache.org/)."
 *    Alternately, this acknowledgment may appear in the software itself,
 *    if and wherever such third-party acknowledgments normally appear.
 *
 * 4. The names "Xerces" and "Apache Software Foundation" must
 *    not be used to endorse or promote products derived from this
 *    software without prior written permission. For written
 *    permission, please contact apache@apache.org.
 *
 * 5. Products derived from this software may not be called "Apache",
 *    nor may "Apache" appear in their name, without prior written
 *    permission of the Apache Software Foundation.
 *
 * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESSED OR IMPLIED
 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
 * DISCLAIMED.  IN NO EVENT SHALL THE APACHE SOFTWARE FOUNDATION OR
 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
 * USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
 * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 * ====================================================================
 *
 * This software consists of voluntary contributions made by many
 * individuals on behalf of the Apache Software Foundation and was
 * originally based on software copyright (c) 1999, International
 * Business Machines, Inc., http://www.apache.org.  For more
 * information on the Apache Software Foundation, please see
 * <http://www.apache.org/>.
 */

package com.sun.org.apache.xerces.internal.impl;

import java.io.IOException;

import com.sun.org.apache.xerces.internal.impl.msg.XMLMessageFormatter;
import com.sun.org.apache.xerces.internal.util.XML11Char;
import com.sun.org.apache.xerces.internal.util.XMLChar;
import com.sun.org.apache.xerces.internal.util.XMLStringBuffer;
import com.sun.org.apache.xerces.internal.xni.XMLString;
import com.sun.org.apache.xerces.internal.xni.XNIException;

/**
 * This class is responsible for scanning XML document structure
 * and content. The scanner acts as the source for the document
 * information which is communicated to the document handler.
 * <p>
 * This component requires the following features and properties from the
 * component manager that uses it:
 * <ul>
 * <li>http://xml.org/sax/features/namespaces</li>
 * <li>http://xml.org/sax/features/validation</li>
 * <li>http://apache.org/xml/features/nonvalidating/load-external-dtd</li>
 * <li>http://apache.org/xml/features/scanner/notify-char-refs</li>
 * <li>http://apache.org/xml/features/scanner/notify-builtin-refs</li>
 * <li>http://apache.org/xml/properties/internal/symbol-table</li>
 * <li>http://apache.org/xml/properties/internal/error-reporter</li>
 * <li>http://apache.org/xml/properties/internal/entity-manager</li>
 * <li>http://apache.org/xml/properties/internal/dtd-scanner</li>
 * </ul>
 *
 * @author Glenn Marcy, IBM
 * @author Andy Clark, IBM
 * @author Arnaud  Le Hors, IBM
 * @author Eric Ye, IBM
 * @version $Id: XML11DocumentScannerImpl.java,v 1.5 2010/08/04 20:59:09 joehw Exp $
 * @xerces.internal
 */
public class XML11DocumentScannerImpl
    extends XMLDocumentScannerImpl {


  /**
   * String buffer.
   */
  private final XMLStringBuffer fStringBuffer = new XMLStringBuffer();
  private final XMLStringBuffer fStringBuffer2 = new XMLStringBuffer();
  private final XMLStringBuffer fStringBuffer3 = new XMLStringBuffer();

  //
  // Constructors
  //

  /**
   * Default constructor.
   */
  public XML11DocumentScannerImpl() {
    super();
  } // <init>()

  //
  // overridden methods
  //

  // XMLDocumentFragmentImpl methods

  /**
   * Scans element content.
   *
   * @return Returns the next character on the stream.
   */
  protected int scanContent(XMLStringBuffer content) throws IOException, XNIException {

    fTempString.length = 0;
    int c = fEntityScanner.scanContent(fTempString);
    content.append(fTempString);

    if (c == '\r' || c == 0x85 || c == 0x2028) {
      // happens when there is the character reference &#13;
      // but scanContent doesn't do entity expansions...
      // is this *really* necessary???  - NG
      fEntityScanner.scanChar();
      content.append((char) c);
      c = -1;
    }
        /*if (fDocumentHandler != null && content.length > 0) {
            fDocumentHandler.characters(content, null);
        } */

    if (c == ']') {
      content.append((char) fEntityScanner.scanChar());
      // remember where we are in case we get an endEntity before we
      // could flush the buffer out - this happens when we're parsing an
      // entity which ends with a ]
      fInScanContent = true;
      //
      // We work on a single character basis to handle cases such as:
      // ']]]>' which we might otherwise miss.
      //
      if (fEntityScanner.skipChar(']')) {
        content.append(']');
        while (fEntityScanner.skipChar(']')) {
          content.append(']');
        }
        if (fEntityScanner.skipChar('>')) {
          reportFatalError("CDEndInContent", null);
        }
      }
            /*if (fDocumentHandler != null && fStringBuffer.length != 0) {
                fDocumentHandler.characters(fStringBuffer, null);
            }*/
      fInScanContent = false;
      c = -1;
    }
    return c;

  } // scanContent():int

  /**
   * Scans an attribute value and normalizes whitespace converting all
   * whitespace characters to space characters.
   *
   * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'"
   *
   * @param value The XMLString to fill in with the value.
   * @param nonNormalizedValue The XMLString to fill in with the non-normalized value.
   * @param atName The name of the attribute being parsed (for error msgs).
   * @param checkEntities true if undeclared entities should be reported as VC violation, false if
   * undeclared entities should be reported as WFC violation.
   * @param eleName The name of element to which this attribute belongs.
   * @return true if the non-normalized and normalized value are the same
   *
   * <strong>Note:</strong> This method uses fStringBuffer2, anything in it at the time of calling
   * is lost.
   **/
  protected boolean scanAttributeValue(XMLString value,
      XMLString nonNormalizedValue,
      String atName,
      boolean checkEntities, String eleName)
      throws IOException, XNIException {
    // quote
    int quote = fEntityScanner.peekChar();
    if (quote != '\'' && quote != '"') {
      reportFatalError("OpenQuoteExpected", new Object[]{eleName, atName});
    }

    fEntityScanner.scanChar();
    int entityDepth = fEntityDepth;

    int c = fEntityScanner.scanLiteral(quote, value);
    if (DEBUG_ATTR_NORMALIZATION) {
      System.out.println("** scanLiteral -> \""
          + value.toString() + "\"");
    }

    int fromIndex = 0;
    if (c == quote && (fromIndex = isUnchangedByNormalization(value)) == -1) {
      /** Both the non-normalized and normalized attribute values are equal. **/
      nonNormalizedValue.setValues(value);
      int cquote = fEntityScanner.scanChar();
      if (cquote != quote) {
        reportFatalError("CloseQuoteExpected", new Object[]{eleName, atName});
      }
      return true;
    }
    fStringBuffer2.clear();
    fStringBuffer2.append(value);
    normalizeWhitespace(value, fromIndex);
    if (DEBUG_ATTR_NORMALIZATION) {
      System.out.println("** normalizeWhitespace -> \""
          + value.toString() + "\"");
    }
    if (c != quote) {
      fScanningAttribute = true;
      fStringBuffer.clear();
      do {
        fStringBuffer.append(value);
        if (DEBUG_ATTR_NORMALIZATION) {
          System.out.println("** value2: \""
              + fStringBuffer.toString() + "\"");
        }
        if (c == '&') {
          fEntityScanner.skipChar('&');
          if (entityDepth == fEntityDepth) {
            fStringBuffer2.append('&');
          }
          if (fEntityScanner.skipChar('#')) {
            if (entityDepth == fEntityDepth) {
              fStringBuffer2.append('#');
            }
            int ch = scanCharReferenceValue(fStringBuffer, fStringBuffer2);
            if (ch != -1) {
              if (DEBUG_ATTR_NORMALIZATION) {
                System.out.println("** value3: \""
                    + fStringBuffer.toString()
                    + "\"");
              }
            }
          } else {
            String entityName = fEntityScanner.scanName();
            if (entityName == null) {
              reportFatalError("NameRequiredInReference", null);
            } else if (entityDepth == fEntityDepth) {
              fStringBuffer2.append(entityName);
            }
            if (!fEntityScanner.skipChar(';')) {
              reportFatalError("SemicolonRequiredInReference",
                  new Object[]{entityName});
            } else if (entityDepth == fEntityDepth) {
              fStringBuffer2.append(';');
            }
            if (entityName == fAmpSymbol) {
              fStringBuffer.append('&');
              if (DEBUG_ATTR_NORMALIZATION) {
                System.out.println("** value5: \""
                    + fStringBuffer.toString()
                    + "\"");
              }
            } else if (entityName == fAposSymbol) {
              fStringBuffer.append('\'');
              if (DEBUG_ATTR_NORMALIZATION) {
                System.out.println("** value7: \""
                    + fStringBuffer.toString()
                    + "\"");
              }
            } else if (entityName == fLtSymbol) {
              fStringBuffer.append('<');
              if (DEBUG_ATTR_NORMALIZATION) {
                System.out.println("** value9: \""
                    + fStringBuffer.toString()
                    + "\"");
              }
            } else if (entityName == fGtSymbol) {
              fStringBuffer.append('>');
              if (DEBUG_ATTR_NORMALIZATION) {
                System.out.println("** valueB: \""
                    + fStringBuffer.toString()
                    + "\"");
              }
            } else if (entityName == fQuotSymbol) {
              fStringBuffer.append('"');
              if (DEBUG_ATTR_NORMALIZATION) {
                System.out.println("** valueD: \""
                    + fStringBuffer.toString()
                    + "\"");
              }
            } else {
              if (fEntityManager.isExternalEntity(entityName)) {
                reportFatalError("ReferenceToExternalEntity",
                    new Object[]{entityName});
              } else {
                if (!fEntityManager.isDeclaredEntity(entityName)) {
                  //WFC & VC: Entity Declared
                  if (checkEntities) {
                    if (fValidation) {
                      fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN,
                          "EntityNotDeclared",
                          new Object[]{entityName},
                          XMLErrorReporter.SEVERITY_ERROR);
                    }
                  } else {
                    reportFatalError("EntityNotDeclared",
                        new Object[]{entityName});
                  }
                }
                fEntityManager.startEntity(entityName, true);
              }
            }
          }
        } else if (c == '<') {
          reportFatalError("LessthanInAttValue",
              new Object[]{eleName, atName});
          fEntityScanner.scanChar();
          if (entityDepth == fEntityDepth) {
            fStringBuffer2.append((char) c);
          }
        } else if (c == '%' || c == ']') {
          fEntityScanner.scanChar();
          fStringBuffer.append((char) c);
          if (entityDepth == fEntityDepth) {
            fStringBuffer2.append((char) c);
          }
          if (DEBUG_ATTR_NORMALIZATION) {
            System.out.println("** valueF: \""
                + fStringBuffer.toString() + "\"");
          }
        }
        // note that none of these characters should ever get through
        // XML11EntityScanner.  Not sure why
        // this check was originally necessary.  - NG
        else if (c == '\n' || c == '\r' || c == 0x85 || c == 0x2028) {
          fEntityScanner.scanChar();
          fStringBuffer.append(' ');
          if (entityDepth == fEntityDepth) {
            fStringBuffer2.append('\n');
          }
        } else if (c != -1 && XMLChar.isHighSurrogate(c)) {
          fStringBuffer3.clear();
          if (scanSurrogates(fStringBuffer3)) {
            fStringBuffer.append(fStringBuffer3);
            if (entityDepth == fEntityDepth) {
              fStringBuffer2.append(fStringBuffer3);
            }
            if (DEBUG_ATTR_NORMALIZATION) {
              System.out.println("** valueI: \""
                  + fStringBuffer.toString()
                  + "\"");
            }
          }
        } else if (c != -1 && isInvalidLiteral(c)) {
          reportFatalError("InvalidCharInAttValue",
              new Object[]{eleName, atName, Integer.toString(c, 16)});
          fEntityScanner.scanChar();
          if (entityDepth == fEntityDepth) {
            fStringBuffer2.append((char) c);
          }
        }
        c = fEntityScanner.scanLiteral(quote, value);
        if (entityDepth == fEntityDepth) {
          fStringBuffer2.append(value);
        }
        normalizeWhitespace(value);
      } while (c != quote || entityDepth != fEntityDepth);
      fStringBuffer.append(value);
      if (DEBUG_ATTR_NORMALIZATION) {
        System.out.println("** valueN: \""
            + fStringBuffer.toString() + "\"");
      }
      value.setValues(fStringBuffer);
      fScanningAttribute = false;
    }
    nonNormalizedValue.setValues(fStringBuffer2);

    // quote
    int cquote = fEntityScanner.scanChar();
    if (cquote != quote) {
      reportFatalError("CloseQuoteExpected", new Object[]{eleName, atName});
    }
    return nonNormalizedValue.equals(value.ch, value.offset, value.length);
  } // scanAttributeValue()

  //
  // XMLScanner methods
  //
  // NOTE:  this is a carbon copy of the code in XML11DTDScannerImpl;
  // we need to override these methods in both places.
  // this needs to be refactored!!!  - NG

  /**
   * Scans public ID literal.
   *
   * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
   * [13] PubidChar::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%]
   *
   * The returned string is normalized according to the following rule,
   * from http://www.w3.org/TR/REC-xml#dt-pubid:
   *
   * Before a match is attempted, all strings of white space in the public
   * identifier must be normalized to single space characters (#x20), and
   * leading and trailing white space must be removed.
   *
   * @param literal The string to fill in with the public ID literal.
   * @return True on success.
   *
   * <strong>Note:</strong> This method uses fStringBuffer, anything in it at the time of calling is
   * lost.
   */
  protected boolean scanPubidLiteral(XMLString literal)
      throws IOException, XNIException {
    int quote = fEntityScanner.scanChar();
    if (quote != '\'' && quote != '"') {
      reportFatalError("QuoteRequiredInPublicID", null);
      return false;
    }

    fStringBuffer.clear();
    // skip leading whitespace
    boolean skipSpace = true;
    boolean dataok = true;
    while (true) {
      int c = fEntityScanner.scanChar();
      // REVISIT:  none of these except \n and 0x20 should make it past the entity scanner
      if (c == ' ' || c == '\n' || c == '\r' || c == 0x85 || c == 0x2028) {
        if (!skipSpace) {
          // take the first whitespace as a space and skip the others
          fStringBuffer.append(' ');
          skipSpace = true;
        }
      } else if (c == quote) {
        if (skipSpace) {
          // if we finished on a space let's trim it
          fStringBuffer.length--;
        }
        literal.setValues(fStringBuffer);
        break;
      } else if (XMLChar.isPubid(c)) {
        fStringBuffer.append((char) c);
        skipSpace = false;
      } else if (c == -1) {
        reportFatalError("PublicIDUnterminated", null);
        return false;
      } else {
        dataok = false;
        reportFatalError("InvalidCharInPublicID",
            new Object[]{Integer.toHexString(c)});
      }
    }
    return dataok;
  }

  /**
   * Normalize whitespace in an XMLString converting all whitespace
   * characters to space characters.
   */
  protected void normalizeWhitespace(XMLString value) {
    int end = value.offset + value.length;
    for (int i = value.offset; i < end; ++i) {
      int c = value.ch[i];
      if (XMLChar.isSpace(c)) {
        value.ch[i] = ' ';
      }
    }
  }

  /**
   * Normalize whitespace in an XMLString converting all whitespace
   * characters to space characters.
   */
  protected void normalizeWhitespace(XMLString value, int fromIndex) {
    int end = value.offset + value.length;
    for (int i = value.offset + fromIndex; i < end; ++i) {
      int c = value.ch[i];
      if (XMLChar.isSpace(c)) {
        value.ch[i] = ' ';
      }
    }
  }

  /**
   * Checks whether this string would be unchanged by normalization.
   *
   * @return -1 if the value would be unchanged by normalization, otherwise the index of the first
   * whitespace character which would be transformed.
   */
  protected int isUnchangedByNormalization(XMLString value) {
    int end = value.offset + value.length;
    for (int i = value.offset; i < end; ++i) {
      int c = value.ch[i];
      if (XMLChar.isSpace(c)) {
        return i - value.offset;
      }
    }
    return -1;
  }

  // returns true if the given character is not
  // valid with respect to the version of
  // XML understood by this scanner.
  protected boolean isInvalid(int value) {
    return (XML11Char.isXML11Invalid(value));
  } // isInvalid(int):  boolean

  // returns true if the given character is not
  // valid or may not be used outside a character reference
  // with respect to the version of XML understood by this scanner.
  protected boolean isInvalidLiteral(int value) {
    return (!XML11Char.isXML11ValidLiteral(value));
  } // isInvalidLiteral(int):  boolean

  // returns true if the given character is
  // a valid nameChar with respect to the version of
  // XML understood by this scanner.
  protected boolean isValidNameChar(int value) {
    return (XML11Char.isXML11Name(value));
  } // isValidNameChar(int):  boolean

  // returns true if the given character is
  // a valid nameStartChar with respect to the version of
  // XML understood by this scanner.
  protected boolean isValidNameStartChar(int value) {
    return (XML11Char.isXML11NameStart(value));
  } // isValidNameStartChar(int):  boolean

  // returns true if the given character is
  // a valid NCName character with respect to the version of
  // XML understood by this scanner.
  protected boolean isValidNCName(int value) {
    return (XML11Char.isXML11NCName(value));
  } // isValidNCName(int):  boolean

  // returns true if the given character is
  // a valid high surrogate for a nameStartChar
  // with respect to the version of XML understood
  // by this scanner.
  protected boolean isValidNameStartHighSurrogate(int value) {
    return XML11Char.isXML11NameHighSurrogate(value);
  } // isValidNameStartHighSurrogate(int):  boolean

  protected boolean versionSupported(String version) {
    return (version.equals("1.1") || version.equals("1.0"));
  } // versionSupported(String):  boolean

  // returns the error message key for unsupported
  // versions of XML with respect to the version of
  // XML understood by this scanner.
  protected String getVersionNotSupportedKey() {
    return "VersionNotSupported11";
  } // getVersionNotSupportedKey: String

} // class XML11DocumentScannerImpl
